Author: Amanda Everitt
Began: 8/22/2018
Finished:: 8/23/2018

[Motivation]

At the conclusion of the last script we ran 5 different DEX comparisons - Neuronal group 2 vs group 1 - Neuronal WT vs Neuronal NULL - Neuronal WT vs Neuronal HET - Tbr1+ Neuronal WT vs Tbr1+ Neuronal NULL - Tbr1+ Neuronal WT vs Tbr1+ Neuronal HET

Lets see how the compare to each other and if there is a Tbr1 dosage effect

How many DEX genes do we have?

full_list <- list()
DE_list <- list()
hcDE_list <- list()
GENEoverlap = data.frame(Gene=character(), File=character())

for (i in list.files(path=paste0(wd,"/outputs/output_04"), pattern="*.csv", full.names = T)){
  name = tools::file_path_sans_ext(basename(i))
  cat(name,"\n")
  full.file = read.csv(i, row.names = 1)
  rownames(full.file) <- full.file$gene
  full_list[[name]] <- full.file
  
  dex.file = full.file[full.file$fdr < 0.05, ]
  cat(nrow(dex.file), "dex genes\n")
  DE_list[[name]] <- dex.file
  write.csv(dex.file, paste0(out_dir, "/DE_", name, ".csv"))
  
  hc.dex.file = dex.file[abs(dex.file$logfc) > 1 & is.na(dex.file$logfc) == FALSE, ]
  cat(nrow(hc.dex.file), "dex genes logfc > or < 1\n")
  hcDE_list[[name]] <- hc.dex.file
  write.csv(hc.dex.file, paste0(out_dir, "/hcDE", name, ".csv"))
  
  temp <- data.frame(Gene=dex.file$gene, 
                     File=rep(name, length(rownames(dex.file))), 
                     Direction=ifelse(dex.file$logfc >0 , 'UP', 'DOWN')
                     )
  GENEoverlap <- merge(x=GENEoverlap, y=temp, all.x=TRUE, all.y=TRUE)
}
## Group2vsGroup1 
## 292 dex genes
## 46 dex genes logfc > or < 1
## Tbr1_WTvsTbr1_HET 
## 8 dex genes
## 7 dex genes logfc > or < 1
## Tbr1_WTvsTbr1_NULL 
## 8 dex genes
## 8 dex genes logfc > or < 1
## WTvsHET 
## 320 dex genes
## 51 dex genes logfc > or < 1
## WTvsNULL 
## 470 dex genes
## 44 dex genes logfc > or < 1
GENEoverlap <- ddply(GENEoverlap, .(Gene,Direction), summarize, Count=length(unique(File)), Files=paste(unique(File),collapse=","))
write.csv(GENEoverlap, paste0(out_dir,"/overlapping_gene.csv"),row.names = FALSE)
## NULL

What are those three that occur in all 5 comparisons

head(GENEoverlap[GENEoverlap$Count == 5, 1:2])
##      Gene Direction
## 82  Calm2        UP
## 192   Fau      DOWN
## 537  Tpt1      DOWN

Lets apply a logFC cut-off to see if there are any strong signals

## NULL

Calm2, Fau, and Tpt1 are retained. What are the other 29 (32-3) that occur in the other comparisons?

genes29 <- intersect(intersect(rownames(hcDE_list[["Group2vsGroup1"]]), 
          rownames(hcDE_list[["WTvsNULL"]])),
          rownames(hcDE_list[["WTvsHET"]]))

GENEoverlap[GENEoverlap$Gene %in% genes29, c(1:2)]
##         Gene Direction
## 13     Actg1      DOWN
## 47    Atp5g2      DOWN
## 49     Atp5h      DOWN
## 82     Calm2        UP
## 121    Cox7b      DOWN
## 159   Eef1a1      DOWN
## 163    Eef1g      DOWN
## 186    Fabp5      DOWN
## 192      Fau      DOWN
## 206    Gapdh      DOWN
## 222     Gpx4      DOWN
## 252 Hsp90ab1      DOWN
## 268    Kif1a      DOWN
## 289   Malat1      DOWN
## 293     Mapt      DOWN
## 295 Marcksl1        UP
## 310   Mllt11        UP
## 325     Naca      DOWN
## 343   Ndufb9      DOWN
## 352     Nnat      DOWN
## 370     Pcp4      DOWN
## 373    Pebp1      DOWN
## 375    Pfdn5      DOWN
## 411     Ptma      DOWN
## 485    Snrpn        UP
## 506    Stmn3      DOWN
## 514     Tbca      DOWN
## 537     Tpt1      DOWN
## 556      Ubc        UP
## 568    Uqcrh      DOWN
## 576    Vamp2        UP

Interesting, are there any DEX genes that we saw in the L6 Tbr1 paper?

bulk_L6 <- read.table("../raw_data/L6_Null_vs_L6_WT-_List_of_significant_genes.txt", header = T)

vennPlot1 = venn.diagram(list(single_cell_L5_NULL =  rownames(DE_list[["WTvsNULL"]]),
                              bulk_seq_L6_NULL = bulk_L6$GeneID),
                        fill = c("blue", "red"), main="Overlapping DEX with L6 bulk paper",
                        alpha = c(0.5, 0.5), cex = 2, filename = NULL)
grid.arrange(grobTree(vennPlot1), ncol=1)

tmp <- bulk_L6[bulk_L6$GeneID %in% intersect(rownames(DE_list[["WTvsNULL"]]), bulk_L6$GeneID), ]
a <- DE_list[["WTvsNULL"]]
tmp2 <- a[a$gene %in% intersect(rownames(DE_list[["WTvsNULL"]]), bulk_L6$GeneID), ]

tmp3 <- merge(tmp[,c("GeneID","FDR","logFC")], tmp2[,c("gene","fdr","logfc")], by.x= "GeneID", by.y="gene", all=T)
colnames(tmp3) <- c("gene","bulk_FDR","bulk_logFC","sc_FDR","sc_logFC")
tmp3
##       gene     bulk_FDR bulk_logFC        sc_FDR    sc_logFC
## 1     Crym 6.799982e-03  0.4852461  1.815403e-03 -0.24866363
## 2     Dok5 4.619205e-02 -0.4475063  8.966553e-03  0.24263099
## 3    Eif1b 7.052720e-04 -0.6263932  3.430163e-02 -0.39010912
## 4  Igfbpl1 1.876577e-02 -0.3932694  1.212472e-03 -0.15968406
## 5    Mef2c 8.141727e-04 -0.4510263  7.464419e-05  0.64670311
## 6      Mn1 7.052720e-04 -0.4756535  2.956498e-03  0.21722292
## 7     Nrgn 3.005407e-05  1.4249697 1.130145e-109  2.23785540
## 8   Ptprz1 4.847611e-02  0.6078919  2.030031e-02 -0.30305609
## 9     Rorb 1.028230e-03 -0.8845394  3.822102e-04  0.26131865
## 10  Slc1a3 1.461498e-03  0.4096388  1.841557e-02 -0.31237367
## 11 Slc6a15 8.560334e-03  0.3977337  1.454119e-03  0.20207802
## 12   Tiam2 1.182788e-02 -0.4019967  4.513499e-02 -0.07731485
## 13  Tmsb4x 3.266334e-02  0.1551467  3.640713e-70 -0.49757955

Any ASD/NDD genes in this list?

ASD_99 <- read.csv("../raw_data/ASD_100_short.csv", header = F)
NDD_99 <- read.csv("../raw_data/NDD_Genes.csv")

vennPlot1 = venn.diagram(list(DE_single_cell = na.omit(toupper(GENEoverlap$Gene)),
                              NDD = NDD_99$Gene),
                        fill = c("blue", "red"), main="Overlapping with NDD list",
                        alpha = c(0.5, 0.5), cex = 2, filename = NULL)
vennPlot2 = venn.diagram(list(DE_single_cell = na.omit(toupper(GENEoverlap$Gene)),
                              ASD = ASD_99$V1),
                        fill = c("blue", "yellow"), main="Overlapping with ASD list",
                        alpha = c(0.5, 0.5), cex = 2, filename = NULL)
grid.arrange(grobTree(vennPlot1),grobTree(vennPlot2),  ncol=1)

Which ASD?

GENEoverlap[toupper(GENEoverlap$Gene) %in% intersect(na.omit(toupper(GENEoverlap$Gene)), ASD_99$V1), ]
##        Gene Direction Count                   Files
## 25     Ank2      DOWN     1                 WTvsHET
## 30    Ap2s1      DOWN     1                WTvsNULL
## 134  Ctnnb1      DOWN     2        WTvsHET,WTvsNULL
## 151  Dpysl2        UP     1                WTvsNULL
## 290   Map1a      DOWN     1                WTvsNULL
## 438    Rorb        UP     2 Group2vsGroup1,WTvsNULL
## 471 Smarcc2      DOWN     1                WTvsNULL

Which NDD?

GENEoverlap[toupper(GENEoverlap$Gene) %in% intersect(na.omit(toupper(GENEoverlap$Gene)), NDD_99$Gene), ]
##      Gene Direction Count                                      Files
## 58  Atxn1        UP     1                                   WTvsNULL
## 59  Atxn1      <NA>     2                     Group2vsGroup1,WTvsHET
## 131  Cst3      DOWN     1                                   WTvsNULL
## 132  Cst3        UP     2                     Group2vsGroup1,WTvsHET
## 261 Ift74      DOWN     1                                   WTvsNULL
## 293  Mapt      DOWN     3            Group2vsGroup1,WTvsHET,WTvsNULL
## 345 Nedd8      DOWN     3            Group2vsGroup1,WTvsHET,WTvsNULL
## 376  Pfn1        UP     3 Group2vsGroup1,Tbr1_WTvsTbr1_NULL,WTvsNULL
## 476  Snca        UP     3            Group2vsGroup1,WTvsHET,WTvsNULL
## 477  Sncb        UP     3            Group2vsGroup1,WTvsHET,WTvsNULL
## 563 Uchl1      DOWN     3            Group2vsGroup1,WTvsHET,WTvsNULL

What genes are unique to HET vs NULL

uniquetoNULL <- setdiff(rownames(DE_list[["WTvsNULL"]]), rownames(DE_list[["WTvsHET"]]))
uniquetoHET <- setdiff(rownames(DE_list[["WTvsHET"]]), rownames(DE_list[["WTvsNULL"]]))

het.df <- DE_list[["WTvsHET"]]
b<- het.df[uniquetoHET, ]
head(b[order(b$fdr, decreasing = F),], n=20)
##                        gene   Pr..Chisq.          fdr      logfc
## Tuba1a               Tuba1a 1.911346e-20 3.264760e-18  0.1673770
## Tceb2                 Tceb2 6.120574e-13 5.629359e-11 -0.7121796
## Marcks               Marcks 1.295289e-12 1.133220e-10 -0.8390952
## Ndn                     Ndn 4.561298e-11 3.676713e-09 -0.3908530
## Tmem256             Tmem256 5.204519e-11 4.148580e-09 -0.7801156
## Sec61g               Sec61g 8.864474e-10 6.174149e-08 -0.9917001
## Hint1                 Hint1 2.699119e-09 1.792915e-07 -0.8767860
## Minos1               Minos1 3.347817e-09 2.203417e-07 -0.8708139
## Dbi                     Dbi 4.172841e-09 2.672854e-07 -0.1646555
## Hist1h2al         Hist1h2al 1.034930e-08 6.292023e-07         NA
## Qk                       Qk 2.057214e-08 1.219707e-06 -0.2202969
## Nucks1               Nucks1 3.372360e-08 1.983058e-06 -0.3878726
## Mt1                     Mt1 6.952680e-08 3.866552e-06 -0.7155882
## Fdps                   Fdps 3.360401e-07 1.721966e-05  0.4362562
## Ncald                 Ncald 3.661267e-07 1.849713e-05  0.2516487
## 2700094K13Rik 2700094K13Rik 5.415229e-07 2.660880e-05 -0.4244777
## Timm8b               Timm8b 7.966073e-07 3.835477e-05 -0.7121666
## Glul                   Glul 1.158288e-06 5.395818e-05  0.6013919
## Nap1l1               Nap1l1 1.241027e-06 5.743955e-05 -0.2586481
## Trappc2l           Trappc2l 2.617065e-06 1.173426e-04 -0.2842538
null.df <- DE_list[["WTvsNULL"]]
a<- null.df[uniquetoNULL, ]
head(a[order(a$fdr, decreasing = F),], n=20)
##              gene   Pr..Chisq.          fdr      logfc
## Uba52       Uba52 5.207473e-67 5.336916e-64 -2.5052484
## Pfn1         Pfn1 6.618202e-21 1.217410e-18  1.4872062
## Agl           Agl 3.205446e-20 5.608748e-18  0.6186134
## AY036118 AY036118 1.109349e-13 1.243511e-11 -0.2515298
## Ybx1         Ybx1 1.737712e-13 1.917899e-11 -0.3990117
## Igfbp7     Igfbp7 2.499205e-12 2.561328e-10 -0.2888790
## Sparc       Sparc 9.226961e-12 8.945164e-10 -0.7863769
## R3hdm4     R3hdm4 2.075054e-11 1.958741e-09 -0.8919598
## Dynll1     Dynll1 9.878987e-11 8.337865e-09  0.5010743
## Cbx3         Cbx3 1.737398e-10 1.432655e-08  0.7007783
## Btf3         Btf3 2.676348e-10 2.133346e-08 -0.9204052
## Olig1       Olig1 3.127263e-10 2.465383e-08 -0.7150770
## Usmg5       Usmg5 5.009233e-10 3.823004e-08  0.2355280
## Vps8         Vps8 1.629184e-09 1.157204e-07 -0.5006708
## Pou3f1     Pou3f1 4.001282e-09 2.682729e-07  0.4624765
## Atp5k       Atp5k 4.061838e-09 2.698113e-07  0.3756602
## Polr2l     Polr2l 4.898630e-09 3.224108e-07  0.6796923
## Anp32b     Anp32b 1.259390e-08 7.925322e-07 -0.4574676
## Polr2k     Polr2k 4.598316e-08 2.703961e-06  0.5534761
## Zwint       Zwint 7.695634e-08 4.416678e-06  0.6169545

Hmm I could see looking into Uba52 maybe if that’s of interest. It looks like the effect is specific to NULL. Pfn1 (and other genes) are less convincing.

Of the HETvWT and NULLvWT does there look like there’s any dosage effect?

possible_dosage <- intersect(rownames(DE_list[["WTvsNULL"]]), rownames(DE_list[["WTvsHET"]]))

a <- merge(null.df[possible_dosage, c("gene","fdr","logfc")], het.df[possible_dosage, c("gene","fdr","logfc")], by = "gene")
colnames(a) <- c("gene","fdr_null","logFC_null","fdr_het","logfc_het")
a$diff <- a$logFC_null - a$logfc_het

head(a[order(abs(a$diff), decreasing = T), ], n=20)
##         gene      fdr_null logFC_null       fdr_het  logfc_het       diff
## 134     Nrgn 1.130145e-109  2.2378554  1.597285e-08  0.6945417  1.5433137
## 21     Basp1  9.515245e-14 -0.9106342  2.372135e-63 -2.1064983  1.1958641
## 114   Mllt11  8.207806e-20  1.2951943  1.635413e-54  2.3852800 -1.0900857
## 213    Uqcrq  7.534780e-17  0.6426170  2.257054e-06 -0.3696124  1.0122294
## 150    Ptgds  5.222333e-58 -1.3680205  2.476207e-35 -0.5572251 -0.8107953
## 194     Tma7  1.130763e-10 -0.4560004  1.211730e-20 -1.2526005  0.7966001
## 72     Fabp5  6.288732e-41 -1.3970972  5.758390e-62 -2.1288752  0.7317779
## 62    Eef1b2  6.293825e-27 -1.6585304  2.170457e-14 -0.9405753 -0.7179552
## 211    Uqcrb  1.868756e-02  0.2812153  1.004351e-10  0.9841242 -0.7029089
## 206      Ubb  5.457484e-37 -0.5727987  7.773452e-03  0.1139253 -0.6867240
## 53      Cst3  1.745072e-02 -0.2956878  2.390381e-26  0.3896798 -0.6853676
## 94    Hbb-bs  1.724861e-34 -0.9362930  2.488139e-20 -0.2729604 -0.6633326
## 142     Pfn2  5.074791e-03  0.2267480  7.250598e-11  0.8840518 -0.6573038
## 137     Pcp4  5.229309e-35 -2.0342251  5.180682e-17 -1.4143072 -0.6199178
## 74       Fau 4.282121e-101 -1.5124906 4.070989e-151 -2.1048027  0.5923121
## 51       Cpe  9.719924e-03  0.5255107  1.319008e-08  1.0234515 -0.4979409
## 110 Marcksl1  7.081025e-18  1.1158453  1.477159e-32  1.6049053 -0.4890600
## 24     Bola2  4.217817e-02 -0.2971507  2.444771e-07 -0.7841082  0.4869575
## 5    Anapc13  4.554266e-02 -0.1114037  2.426204e-07 -0.5979225  0.4865189
## 91     H3f3a  5.071443e-19 -1.2427567  2.215070e-08 -0.7723629 -0.4703938

That Nrgn plot looks pretty convincing to me.

GO enrichment

## 6704 470

## 6854 320

## 6951 223

Another way to visualize the fold changes + go enrichment

## Using Term as id variables

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin17.7.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.6_1/lib/libopenblasp-r0.3.6.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] stats4    parallel  grid      stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] org.Mm.eg.db_3.8.2                      
##  [2] Seurat_2.3.4                            
##  [3] Matrix_1.2-17                           
##  [4] cowplot_1.0.0                           
##  [5] GO.db_3.8.2                             
##  [6] ggplot2_3.2.1                           
##  [7] goseq_1.36.0                            
##  [8] geneLenDataBase_1.20.0                  
##  [9] BiasedUrn_1.07                          
## [10] TxDb.Mmusculus.UCSC.mm10.knownGene_3.4.7
## [11] TxDb.Mmusculus.UCSC.mm10.ensGene_3.4.0  
## [12] GenomicFeatures_1.36.4                  
## [13] AnnotationDbi_1.46.1                    
## [14] Biobase_2.44.0                          
## [15] GenomicRanges_1.36.1                    
## [16] GenomeInfoDb_1.20.0                     
## [17] IRanges_2.18.3                          
## [18] S4Vectors_0.22.1                        
## [19] BiocGenerics_0.30.0                     
## [20] reshape2_1.4.3                          
## [21] plyr_1.8.4                              
## [22] gridExtra_2.3                           
## [23] VennDiagram_1.6.20                      
## [24] futile.logger_1.4.3                     
## [25] knitr_1.25                              
## 
## loaded via a namespace (and not attached):
##   [1] snow_0.4-3                  backports_1.1.5            
##   [3] Hmisc_4.2-0                 igraph_1.2.4.1             
##   [5] lazyeval_0.2.2              splines_3.6.0              
##   [7] BiocParallel_1.18.1         digest_0.6.22              
##   [9] foreach_1.4.7               htmltools_0.4.0            
##  [11] lars_1.2                    gdata_2.18.0               
##  [13] magrittr_1.5                checkmate_1.9.4            
##  [15] memoise_1.1.0               cluster_2.1.0              
##  [17] mixtools_1.1.0              ROCR_1.0-7                 
##  [19] Biostrings_2.52.0           matrixStats_0.55.0         
##  [21] R.utils_2.9.0               prettyunits_1.0.2          
##  [23] colorspace_1.4-1            blob_1.2.0                 
##  [25] xfun_0.10                   dplyr_0.8.3                
##  [27] jsonlite_1.6                crayon_1.3.4               
##  [29] RCurl_1.95-4.12             zeallot_0.1.0              
##  [31] zoo_1.8-6                   survival_2.44-1.1          
##  [33] iterators_1.0.12            ape_5.3                    
##  [35] glue_1.3.1                  gtable_0.3.0               
##  [37] zlibbioc_1.30.0             XVector_0.24.0             
##  [39] DelayedArray_0.10.0         kernlab_0.9-27             
##  [41] prabclus_2.3-1              DEoptimR_1.0-8             
##  [43] scales_1.0.0                futile.options_1.0.1       
##  [45] DBI_1.0.0                   bibtex_0.4.2               
##  [47] Rcpp_1.0.2                  metap_1.1                  
##  [49] dtw_1.21-3                  progress_1.2.2             
##  [51] htmlTable_1.13.2            reticulate_1.13            
##  [53] proxy_0.4-23                foreign_0.8-72             
##  [55] bit_1.1-14                  mclust_5.4.5               
##  [57] SDMTools_1.1-221.1          Formula_1.2-3              
##  [59] tsne_0.1-3                  htmlwidgets_1.5.1          
##  [61] httr_1.4.1                  gplots_3.0.1.1             
##  [63] RColorBrewer_1.1-2          fpc_2.2-3                  
##  [65] acepack_1.4.1               modeltools_0.2-22          
##  [67] ica_1.0-2                   pkgconfig_2.0.3            
##  [69] XML_3.98-1.20               R.methodsS3_1.7.1          
##  [71] flexmix_2.3-15              nnet_7.3-12                
##  [73] tidyselect_0.2.5            labeling_0.3               
##  [75] rlang_0.4.1                 munsell_0.5.0              
##  [77] tools_3.6.0                 RSQLite_2.1.2              
##  [79] ggridges_0.5.1              evaluate_0.14              
##  [81] stringr_1.4.0               yaml_2.2.0                 
##  [83] npsurv_0.4-0                bit64_0.9-7                
##  [85] fitdistrplus_1.0-14         robustbase_0.93-5          
##  [87] caTools_1.17.1.2            purrr_0.3.3                
##  [89] RANN_2.6.1                  pbapply_1.4-2              
##  [91] nlme_3.1-141                formatR_1.7                
##  [93] R.oo_1.23.0                 biomaRt_2.40.5             
##  [95] hdf5r_1.3.0                 compiler_3.6.0             
##  [97] rstudioapi_0.10             png_0.1-7                  
##  [99] lsei_1.2-0                  tibble_2.1.3               
## [101] stringi_1.4.3               lattice_0.20-38            
## [103] vctrs_0.2.0                 lifecycle_0.1.0            
## [105] pillar_1.4.2                lmtest_0.9-37              
## [107] Rdpack_0.11-0               irlba_2.3.3                
## [109] data.table_1.12.6           bitops_1.0-6               
## [111] gbRd_0.4-11                 rtracklayer_1.44.4         
## [113] R6_2.4.0                    latticeExtra_0.6-28        
## [115] KernSmooth_2.23-16          codetools_0.2-16           
## [117] lambda.r_1.2.4              MASS_7.3-51.4              
## [119] gtools_3.8.1                assertthat_0.2.1           
## [121] SummarizedExperiment_1.14.1 withr_2.1.2                
## [123] GenomicAlignments_1.20.1    Rsamtools_2.0.3            
## [125] GenomeInfoDbData_1.2.1      diptest_0.75-7             
## [127] mgcv_1.8-30                 doSNOW_1.0.18              
## [129] hms_0.5.2                   rpart_4.1-15               
## [131] tidyr_1.0.0                 class_7.3-15               
## [133] rmarkdown_1.16              segmented_1.0-0            
## [135] Rtsne_0.15                  base64enc_0.1-3